#===============================================================================
# 9_image_study_figure_1_appA.R
# Purpose: To replicate Figure 1, Appendix A of the paper: 
#          "Inter-Rater Reliability statistics"
#===============================================================================

# PACKAGES
#===============================================================================
library(dplyr)
library(tidyr)
library(rio)
library(stringr)
library(irr)
library(ggplot2)
library(hrbrthemes)
extrafont::loadfonts()
hrbrthemes::import_roboto_condensed()
library(ggsci)


# DATA
#===============================================================================
# - load data with info on IRR stats reported in meta-analysis papers
meta_db <- read_csv("./data/clean_meta_data_w_std_irr_info.csv",          
                    col_types = cols(irr_value = col_character()))


meta_db05 <- meta_db %>%
  # ... manually transforming one Percentage to Proportion
  mutate(irr_value = ifelse(irr_value == "097", "0.97", irr_value)) %>%
  # ... make sure IRR stats are numeric
  mutate(irr_value = as.numeric(irr_value)) %>%
  # ... human-readable labels for the IRR stats
  mutate(irr_stat = recode(irr_stat,
                           `percent-agreement` = "Percent Agreement",
                           `krippendorff` = "Krippendorff's Alpha",
                           `cronbach` = "Cronbach's Alpha",
                           `gwet` = "Gwet's AC1",
                           `cohen` = "Cohen's Kappa",
                           `krippendorf` = "Krippendorff's Alpha",
                           `light` = "Light's Kappa",
                           `fless` = "Fleiss' Kappa",
                           `irr-unspecified` = "Unspecified IRR statistic",
                           `tau` = "Kendall's Tau",
                           `correlation` = "Correlation",
                           `precision` = "Precision",
                           `winer` = "Winer")) %>%
  # ... cluster some of the less common ones into an Other category
  mutate(irr_stat_simple = ifelse(irr_stat %in% c(
    "Fleiss' Kappa", "Light's Kappa", "Winer", "Gwet's AC1", "Cronbach's Alpha",
    "Kendall's Tau", "Precision"
  ), "Other", irr_stat),
  irr_stat_simple = factor(irr_stat_simple, levels = c(
    "Other",
    "Correlation",
    "Unspecified IRR statistic",
    "Cohen's Kappa",
    "Krippendorff's Alpha",
    "Percent Agreement"
  )))

# - average value per statistic type
meta_db05_avg <- meta_db05 %>%
  group_by(irr_stat_simple) %>%
  summarise(avg_irr = mean(irr_value, na.rm = TRUE),
            n = n()) %>%
  arrange(avg_irr) %>%
  mutate(irr_stat_simple = factor(irr_stat_simple, 
                                  levels = irr_stat_simple))

# MAIN
#===============================================================================
# - the plot
ggplot(meta_db05 %>%
         mutate(irr_stat_simple = factor(
           irr_stat_simple, levels = levels(meta_db05_avg$irr_stat_simple))),
       aes(x = irr_stat_simple, y = irr_value)) +
  geom_point(alpha = 0.3, size = 6) +
  geom_point(size = 6, pch = 1) +
  geom_point(inherit.aes = FALSE,
             data = meta_db05_avg,
             aes(x = irr_stat_simple, y = avg_irr),
             shape = "|", size = 15) +
  coord_flip() +
  scale_y_continuous("Inter Rater Reliability statistic",
                     breaks = seq(0, 1, 0.1)) +
  scale_x_discrete("") +
  theme(axis.text.y = element_text(size = 16),
        axis.text.x = element_text(size = 16),
        axis.title.x = element_text(size = 12),
        panel.background = element_blank(),
        panel.grid.major = element_line(color = "gray90"))

ggsave("./results/appendix_a/appA_1_IRR_meta.pdf",
       width = 30, height = 18, units = "cm", dpi = 300)
